library(survival)
library(KMsurv)
library(ggplot2)
library(ggpubr)
library(survminer)
library(plotly)
library(muhaz)
vars <- c("id", "clinic", "status", "time", "prison", "methadone")
addicts <-
read.table(
url(
"https://dmrocke.ucdavis.edu/Class/BST222.2022.Fall/addicts.txt"
),
header = F,
col.names = vars
)
#addicts
#change variables to factors to be used in Cox PH
addicts$clinic <- factor(addicts$clinic,labels=c("Clinic1","Clinic2"))
addicts$prison <- factor(addicts$prison,labels=c("No","Yes"))
head(addicts)
## id clinic status time prison methadone
## 1 1 Clinic1 1 428 No 50
## 2 2 Clinic1 1 275 Yes 55
## 3 3 Clinic1 1 262 No 55
## 4 4 Clinic1 1 183 No 30
## 5 5 Clinic1 1 259 Yes 65
## 6 6 Clinic1 1 714 No 55
surv_object <- Surv(time = addicts$time, event = addicts$status)
KMcurves <- survfit(surv_object ~ clinic, data = addicts)
KMplot <- ggsurvplot(KMcurves) + labs(title = "Tenure in the Clinic")
ggplotly(KMplot[[1]])
survdiff(surv_object ~ clinic, data = addicts)
## Call:
## survdiff(formula = surv_object ~ clinic, data = addicts)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## clinic=Clinic1 163 122 90.9 10.6 27.9
## clinic=Clinic2 75 28 59.1 16.4 27.9
##
## Chisq= 27.9 on 1 degrees of freedom, p= 1e-07
We can see that our p-value is below any reasonable \(\alpha\), allowing us to conlclude that there is a significant difference between the two clinics’ process.
NAcurves <-
survfit(surv_object ~ clinic, type = "fleming-harrington", data = addicts)
#Cumulative Hazard plot
cumHazPlot <-
ggsurvplot(
NAcurves,
fun = "cumhaz",
palette = c("#581845", "#FFC300"),
ggtheme = theme_light()
) + ggtitle("Cumulative Hazard for Two Clinics")
ggplotly(cumHazPlot[[1]])
# Complimentary log-log survival plot
cumHazPlot <-
ggsurvplot(
NAcurves,
fun = "cloglog",
palette = c("#581845", "#FFC300"),
ggtheme = theme_light()
) + ggtitle("Complimentary Log-Log for Two Clinics")
ggplotly(cumHazPlot[[1]])
For most of the plot (besides the very beginning), our two lines are parallel, suggesting our hazards are proportional. However, neither of these lines are straight, as they both have curving in their shape. This suggests that the Weibull distribution may not be appropriate for failure times in this data.
# Clinic is already a factor
class(addicts$clinic)
## [1] "factor"
# Building a Cox Model
addicts.cox <- coxph(surv_object ~ clinic, data = addicts)
summary(addicts.cox)
## Call:
## coxph(formula = surv_object ~ clinic, data = addicts)
##
## n= 238, number of events= 150
##
## coef exp(coef) se(coef) z Pr(>|z|)
## clinicClinic2 -1.0754 0.3412 0.2127 -5.057 4.26e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## clinicClinic2 0.3412 2.931 0.2249 0.5176
##
## Concordance= 0.574 (se = 0.022 )
## Likelihood ratio test= 30.99 on 1 df, p=3e-08
## Wald test = 25.57 on 1 df, p=4e-07
## Score (logrank) test = 27.92 on 1 df, p=1e-07
We can see that the hazard ratio for Clinic 2 vs Clinic 1 is \(e^{-1.0754}\), meaning that Clinic 2 has \(0.3412\) times the risk of death compared to Clinic 1. Our p-value is very small, smaller than any reasonable \(\alpha\), so we can conclude that our \(\beta\) is significantly different from zero.
Our 95% confidence interval for \(e^{Clinic_2}\) is the interval \((0.2249, 0.5176)\), which does not contain 1. Thus we can say that the risk for clinic 2 is significantly lower than clinic 1.
Ask professor
addicts.cox2 <- coxph(surv_object ~ clinic + prison + methadone, data = addicts)
summary(addicts.cox2)
## Call:
## coxph(formula = surv_object ~ clinic + prison + methadone, data = addicts)
##
## n= 238, number of events= 150
##
## coef exp(coef) se(coef) z Pr(>|z|)
## clinicClinic2 -1.009896 0.364257 0.214889 -4.700 2.61e-06 ***
## prisonYes 0.326555 1.386184 0.167225 1.953 0.0508 .
## methadone -0.035369 0.965249 0.006379 -5.545 2.94e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## clinicClinic2 0.3643 2.7453 0.2391 0.5550
## prisonYes 1.3862 0.7214 0.9988 1.9238
## methadone 0.9652 1.0360 0.9533 0.9774
##
## Concordance= 0.665 (se = 0.025 )
## Likelihood ratio test= 64.56 on 3 df, p=6e-14
## Wald test = 54.12 on 3 df, p=1e-11
## Score (logrank) test = 56.32 on 3 df, p=4e-12
Observing our hazard ratio coefficient confidence intervals, we can see that the prison variable CI contains 1, indicating our variable is not significant, which agrees with the respective p-value above \(\alpha = 0.05\). However we can see that the methadone ratio is indeed significant.
addicts.cox3 <- coxph(surv_object ~ clinic + methadone, data = addicts)
summary(addicts.cox3)
## Call:
## coxph(formula = surv_object ~ clinic + methadone, data = addicts)
##
## n= 238, number of events= 150
##
## coef exp(coef) se(coef) z Pr(>|z|)
## clinicClinic2 -0.950878 0.386402 0.212079 -4.484 7.34e-06 ***
## methadone -0.034327 0.966255 0.006274 -5.471 4.47e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## clinicClinic2 0.3864 2.588 0.2550 0.5855
## methadone 0.9663 1.035 0.9544 0.9782
##
## Concordance= 0.659 (se = 0.025 )
## Likelihood ratio test= 60.79 on 2 df, p=6e-14
## Wald test = 52.68 on 2 df, p=4e-12
## Score (logrank) test = 54.25 on 2 df, p=2e-12
KMcurves <- survfit(surv_object ~ clinic + methadone, data = addicts)
CoxPH <- survfit(addicts.cox, data.frame(clinic = 1:2), conf.int = F)
## Warning in model.frame.default(data = structure(list(clinic = 1:2), class =
## "data.frame", row.names = c(NA, : variable 'clinic' is not a factor
fit <- list(CoxPH = CoxPH, KapMei = KMcurves)
compPlot <-
ggsurvplot_combine(fit, data = addicts) + labs(title = "Comparison of Cox PH Curves to Kaplan-Meier")
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning in (function (survsummary, times, survtable = c("cumevents",
## "risk.table", : The length of legend.labs should be 24
## Warning in (function (survsummary, times, survtable = c("cumevents",
## "risk.table", : The length of legend.labs should be 24
## Warning in (function (survsummary, times, survtable = c("cumevents",
## "risk.table", : The length of legend.labs should be 24
ggplotly(compPlot[[1]])